global root_dir = "`1'"

include "$root_dir/code/config/config.do"


cap noi log using ${log_dir}/df_predictingweights.log, replace name(dat)

*Handle empty arguments
global arg1 = cond("`2'" == "___EMPTY___", "", "`2'")
global arg2 = cond("`3'" == "___EMPTY___", "", "`3'")
global arg3 = cond("`4'" == "___EMPTY___", "", "`4'")
global arg4 = cond("`5'" == "___EMPTY___", "", "`5'")

if "$arg1" != "" {
    global weight_category "$arg1"
    di "Weight category: ${weight_category}"
}

if "$arg2" != "" {
    global weight_versions "$arg2"
    di "Weight versions: ${weight_versions}"
}

if "$arg3" != "" {
    global weight_window "$arg3"
    di "Weight window: ${weight_window}"
}

if "$arg4" != "" {
	global wtype "$arg4"
}
di "${wtype}"
capture noi {

* v4
* Note: uses the primary regression dataset as input; creates the data
* for the table_predictingweights exercise in table A12
do ${code_dir}/config/country_list.do

* Build firm and foreign weights 
*first load firms
use ${dataset_dir}/dep_vars/bvd_year_list${weight_window}_${wtype}.dta, clear
keep BvD
duplicates drop
*load weights
mmerge BvD using ${dataset_dir}/weights/bvdid_pat_weights_EPtr${weight_window}_1995_orbis2017_${wtype}.dta, unmatched(master)
drop _merge
egen maxshare = rowmax(share2_all_1995_*)
gen country_shr_1995  = ""
gen weight_domestic  = .

*set the domestic weight to to the country of the largest share
foreach ctry of global countrylist1995 {
		replace country_shr_1995 = "`ctry'" if maxshare==share2_all_1995_`ctry'
		replace weight_domestic = share2_all_1995_`ctry' if maxshare==share2_all_1995_`ctry'
	}
*mark domestic only companies, no foreign weights
gen domestic = 0
replace domestic = 1 if maxshare == 1

*foreign weight as complement
gen share_foreign = 1 - weight_domestic
foreach ctry of global countrylist1995 {
		gen weight_foreign_`ctry' = .
		replace weight_foreign_`ctry' = share2_all_1995_`ctry' / share_foreign if maxshare!=share2_all_1995_`ctry'
	}
*soem formating
reshape long share2_all_1995_ weight_foreign_, i(BvD) j(country, string)
rename share2_all_1995_ weight
rename weight_foreign_ weight_foreign
sort BvD country
egen country_id = group(country)

*keep only freign weights
keep BvD country_id weight weight_foreign domestic
tempfile firms_and_foreign_weights
save `firms_and_foreign_weights', replace

* Build country-level growth rates in LSW/HSW for 1995 to 2000, for our main deflator
foreach vv in lsw hsw {
	use ${dataset_dir}/indep_vars/`vv'ages_wide_MANUF.dta, clear
	keep year `vv'MP*
	keep if year == 1995 | year == 2000
	reshape long
	gen ln`vv'MP = log(`vv'MP_)
	keep year ctry_code ln`vv'MP
	sort ctry_code year
	egen country_id = group(ctry_code)
	sort country_id year
	*make 5yr interval grown rates
	bys country_id: gen g_`vv'_5yr_1995 = (ln`vv'MP-L5.ln`vv'MP)/5
	keep if year == 2000
	keep country_id g*
	tempfile g_`vv'_5yr_1995
	save `g_`vv'_5yr_1995', replace
}

* Merge the dataset-parts together
use ${final_dir}/regression_dataset${weight_window}_${wtype}.dta, clear
mmerge BvD using ${final_dir}/bvd_list_regfirms_auto95.dta, unmatched(master)
keep if _m == 3
bys lse_id : egen _total_${depvar}_${ttt}_1995 = sum(${depvar}_${ttt}) if year>=1995+2 & year <= 2009+2
bys lse_id : egen total_${depvar}_${ttt}_1995 = max(_total_${depvar}_${ttt}_1995)
keep BvD total_${depvar}_${ttt}_1995
duplicates drop
mmerge BvD using `firms_and_foreign_weights', unmatched(master)
keep if _m == 3
mmerge country_id using `g_lsw_5yr_1995', unmatched(master)
mmerge country_id using `g_hsw_5yr_1995', unmatched(master)
sort BvD country_id
by BvD, sort: gen nvals = _n == 1 
count if nvals == 1 
keep BvD country_id total_auto95_bia_1995 weight* g_*_5yr_1995

 
save ${final_dir}/regression_dataset_predictingweights_${wtype}.dta, replace


}
if _rc == 0 {
    display "Execution finished successfully."
}
else {
    display "Execution finished with errors."
}

cap log close dat